*-------------------------------------------------------------------------------
*						Appendix I Tab 1 and Appendix I Fig 1
*-------------------------------------------------------------------------------

** Set Path
global Raw_data    	"G:\project-finished\Descriptive\Data"
global App_data    	"G:\project-finished\Descriptive\Appendix Data"
global Class_data   "G:\project-finished\Descriptive\Classification"  
global Work_lab   	"G:\project-finished\Descriptive\Lab"
global Out_lab    	"G:\project-finished\Descriptive\Out" 

cd "$Work_lab"
                            
capture log close            
log using "$Out_lab\Appendix I Tab 1 and Appendix I Fig 1", replace 
set more off     


**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (1) Long Table Census Data
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata1,replace emptyok

*Industry2000
import excel "$App_data\职业和行业就业分布.xlsx", sheet("2000行业-职业交叉") firstrow clear
keep in 1/18
drop if B==""
keep 行业分类 总计

gen industry=_n

rename (行业分类 总计) (title num_longtable)
destring num_longtable,replace
compress

gen year=2000
append using tabledata1
save tabledata1,replace


*Industry2010
import excel "$App_data\职业和行业就业分布.xlsx", sheet("2010行业中类") firstrow clear
keep if 维度=="大"

keep 分类 人数
destring 人数,replace

rename 分类 title
rename 人数 num_longtable
gen industry=_n

gen year=2010
append using tabledata1
save tabledata1,replace

*Industry2015, no data, just generate industry title
import excel "$App_data\职业和行业就业分布.xlsx", sheet("2010行业中类") firstrow clear
keep if 维度=="大"
keep 分类
rename 分类 title
gen industry=_n

gen year=2015
append using tabledata1
save tabledata1,replace

**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (2) Literature Data
**------------------------------------------------------------------------------
clear
set obs 0
save tabledata2,replace emptyok

foreach j in "2000" "2010" "2015"{

import excel "$App_data\王亚菲行业就业测算结果.xlsx", sheet("Sheet1") firstrow clear

if `j'==2000{
	*Industry@2000
	drop if Industry_94==.
	keep Industry_94 year2000
	rename Industry_94 temp
	gen industry_20=1 if temp>=1 & temp<=5
	replace industry_20=2 if temp>=6 & temp<=12
	replace industry_20=3 if temp>=13 & temp<=43
	replace industry_20=4 if temp>=44 & temp<=46
	replace industry_20=5 if temp>=47 & temp<=49
	replace industry_20=6 if temp>=50 & temp<=51
	replace industry_20=7 if temp>=52 & temp<=60
	replace industry_20=8 if temp>=61 & temp<=67
	replace industry_20=9 if temp>=68 & temp<=70
	replace industry_20=10 if temp>=72 & temp<=74
	replace industry_20=11 if temp>=75 & temp<=84
	replace industry_20=12 if temp>=85 & temp<=87
	replace industry_20=13 if temp>=89 & temp<=91
	replace industry_20=14 if temp>=92 & temp<=93
	replace industry_20=15 if temp>=94 & temp<=97
	replace industry_20=16 if temp==99
}

if `j'!=2000 {
*Industry@2010, @2015
	drop if Industry_12==.
	keep Industry_12 year2010 year2015
	rename Industry_12 temp
	gen industry_20=1 if temp>=1 & temp<=5
	replace industry_20=2 if temp>=6 & temp<=11
	replace industry_20=3 if temp>=13 & temp<=43
	replace industry_20=4 if temp>=44 & temp<=46
	replace industry_20=5 if temp>=47 & temp<=50
	replace industry_20=6 if temp>=51 & temp<=59
	replace industry_20=7 if temp>=60 & temp<=62
	replace industry_20=8 if temp>=63 & temp<=65
	replace industry_20=9 if temp>=66 & temp<=67
	replace industry_20=10 if temp>=68 & temp<=71
	replace industry_20=11 if temp==72
	replace industry_20=12 if temp>=73 & temp<=74
	replace industry_20=13 if temp>=75 & temp<=78
	replace industry_20=14 if temp>=79 & temp<=81
	replace industry_20=15 if temp>=82 & temp<=83
	replace industry_20=16 if temp==84
	replace industry_20=17 if temp>=85 & temp<=87
	replace industry_20=18 if temp>=88 & temp<=92
	replace industry_20=19 if temp>=93 & temp<=97
	replace industry_20=20 if temp==98
}

rename industry_20 industry
collapse (sum) year`j',by(industry)

rename year`j' num

gen year=`j'

append using tabledata2
save tabledata2,replace
}


**------------------------------------------------------------------------------
* Step1: Generate Data
* 						column (3) Mircro Census Data
**------------------------------------------------------------------------------
clear
clear all

//empty file for processed data
clear
set obs 0
save tabledata3,replace emptyok

*----------Industry Structure,2000----------*
use "$Raw_data\census2000.dta",clear
drop if industry == . | industry==0

*Adjust Industry Catergory @CIC1994
tostring industry,replace
replace industry="0"+industry if strlen(industry)==2

gen temp=substr(industry,1,2)
destring temp,replace
gen industry_16=1 if temp>=1 & temp<=5
replace industry_16=2 if temp>=6 & temp<=12
replace industry_16=3 if temp>=13 & temp<=43
replace industry_16=4 if temp>=44 & temp<=46
replace industry_16=5 if temp>=47 & temp<=49
replace industry_16=6 if temp>=50 & temp<=51
replace industry_16=7 if temp>=52 & temp<=60
replace industry_16=8 if temp>=61 & temp<=67
replace industry_16=9 if temp>=68 & temp<=70
replace industry_16=10 if temp>=72 & temp<=74
replace industry_16=11 if temp>=75 & temp<=84
replace industry_16=12 if temp>=85 & temp<=87
replace industry_16=13 if temp>=89 & temp<=91
replace industry_16=14 if temp>=92 & temp<=93
replace industry_16=15 if temp>=94 & temp<=97
replace industry_16=16 if temp==99
drop temp

gen num=_n
collapse (count) num,by(industry_16)

gen year=2000
rename industry_16 industry
append using tabledata3
save tabledata3,replace

*----------Industry Structure,2010----------*
use "$Raw_data\census2010.dta",clear

*Adjust Industry Catergory @CIC2002
rename _行业 industry
drop if industry==.

tostring industry,replace
replace industry="0"+industry if strlen(industry)==2

gen temp=substr(industry,1,2)
destring temp,replace
gen industry_20=1 if temp>=1 & temp<=5
replace industry_20=2 if temp>=6 & temp<=11
replace industry_20=3 if temp>=13 & temp<=43
replace industry_20=4 if temp>=44 & temp<=46
replace industry_20=5 if temp>=47 & temp<=50
replace industry_20=6 if temp>=51 & temp<=59
replace industry_20=7 if temp>=60 & temp<=62
replace industry_20=8 if temp>=63 & temp<=65
replace industry_20=9 if temp>=66 & temp<=67
replace industry_20=10 if temp>=68 & temp<=71
replace industry_20=11 if temp==72
replace industry_20=12 if temp>=73 & temp<=74
replace industry_20=13 if temp>=75 & temp<=78
replace industry_20=14 if temp>=79 & temp<=81
replace industry_20=15 if temp>=82 & temp<=83
replace industry_20=16 if temp==84
replace industry_20=17 if temp>=85 & temp<=87
replace industry_20=18 if temp>=88 & temp<=92
replace industry_20=19 if temp>=93 & temp<=97
replace industry_20=20 if temp==98
drop temp

gen num=_n
collapse (count) num,by(industry_20)

gen year=2010
rename industry_20 industry
append using tabledata3
save tabledata3,replace

*----------Industry Structure,2015----------*
use "$Raw_data\census2015.dta",clear

drop if industry==""

gen temp=substr(industry,1,2)
destring temp,replace
gen industry_20=1 if temp>=1 & temp<=5
replace industry_20=2 if temp>=6 & temp<=12
replace industry_20=3 if temp>=13 & temp<=43
replace industry_20=4 if temp>=44 & temp<=46
replace industry_20=5 if temp>=47 & temp<=50
replace industry_20=8 if temp>=51 & temp<=52  //零售
replace industry_20=6 if temp>=53 & temp<=60 //运输
replace industry_20=9 if temp>=61 & temp<=62 //住宿餐饮
replace industry_20=7 if temp>=63 & temp<=65  //软件
replace industry_20=10 if temp>=66 & temp<=69 //金融
replace industry_20=11 if temp==70
replace industry_20=12 if temp>=71 & temp<=72
replace industry_20=13 if temp>=73 & temp<=75
replace industry_20=14 if temp>=76 & temp<=79
replace industry_20=15 if temp>=80 & temp<=82
replace industry_20=16 if temp==83
replace industry_20=17 if temp>=84 & temp<=85 
replace industry_20=17 if temp==94
replace industry_20=18 if temp>=86 & temp<=90
replace industry_20=19 if temp>=91 & temp<=93
replace industry_20=19 if temp>=95 & temp<=96
replace industry_20=20 if temp==97

gen num=_n
collapse (count) num,by(industry_20)
compress

rename industry_20 industry
gen year=2015
append using tabledata3
save tabledata3,replace



**------------------------------------------------------------------------------
* Step2: Display Data
**------------------------------------------------------------------------------
use tabledata1,clear
merge 1:1 year industry using tabledata2
rename num num_literature
drop _m

merge 1:1 year industry using tabledata3
drop _m
replace num=0 if num==. //sector 20 have no pop
rename num num_paper

rename num_* *


foreach var in longtable literature paper {
bys year:egen pop_`var'=total(`var')
gen share_`var'=`var'/pop_`var'*100
drop pop_`var' `var'
rename share_`var' `var'
format `var' %9.3f
}

sort year industry
drop industry

list _all


**------------------------------------------------------------------------------
* Step3: Plot Data
**------------------------------------------------------------------------------
gen ratio_long=paper/longtable
gen ratio_literatre=paper/literature

* paper=0 is omitted due to no population
* 其他行业 is omitted due to different industry content
twoway (kdensity ratio_long if title!="其他行业"|paper==0), ///
scheme(plotplain) xtitle("本文就业占比/统计局就业占比") ///
ytitle("核密度") xline(1,lc(red)) xlabel(,nogrid) ylabel(,nogrid)
graph save "$Out_lab\appI_Fig1a",replace
graph export "$Out_lab\appI_Fig1a.png",replace


twoway (kdensity ratio_literatre if title!="其他行业" |paper==0), ///
scheme(plotplain) xtitle("本文就业占比/文献就业占比") ///
ytitle("核密度") xline(1,lc(red))  xlabel(,nogrid) ylabel(,nogrid)
graph save "$Out_lab\appI_Fig1b",replace
graph export "$Out_lab\appI_Fig1b.png",replace



erase tabledata1.dta
erase tabledata2.dta
erase tabledata3.dta


log close
